base_dir <- getwd()
in_file <- paste0(base_dir, "/data/", roi, "_QUANT.tsv")
allmarkers_outfile <- paste0(base_dir, "/data/for_seurat/allmarkers_", roi, "_qcd_withids.csv")
filteredmarkers_outfile <- paste0(base_dir, "/data/for_seurat/", roi, "_qcd_filtered_markers_withids.csv")
cluster_outfile <- paste0(base_dir, "/data/cluster_output/", roi, "_clusters.csv")

# Metric for clustering
cluster_metric <- "Cell: Median"

# QC parameters
sigsum_quantile_high <- 0.99
sigsum_quantile_low <- 0.05
bin_size <- 50
density_cutoff <- 5

# Clustering parameters
# markers_selected <- c("BCL2", "FOXP3", "LDHA", "CD11b", "CD20", "HLADR", "ICOS", ### Full quality panel
#                       "CD31", "CD4", "CD14", "CD45RO", "CD107a", "CD68", "CD57",
#                       "CD3e", "Ki67", "CD38", "CD123", "CD11c", "CD8", "PD1",
#                       "CD44", "PDL1", "CD45")

markers_selected <- c("BCL2", "FOXP3", "CD11b", "CD20", "ICOS", ### Remove functional markers
                      "CD31", "CD4", "CD14", "CD45RO", "CD107a", 
                      "CD68", "CD57", "CD3e", "CD38", "CD11c", "CD8",
                      "CD44", "CD45")
# clustering_res <- 1.5
min_clusters <- 6

ROI: reg005

QC

There are two main quality control steps:

  1. SigSum: filtering outlier cells based on the sum of all marker signals per cell.
    • Cutoff: remove cells above the 99th percentile and below the 0.1 percentile
  2. Low density cells
    • Remove “lonely” cells in low-density regions
# Read in data
roi_df <- read.table(in_file, sep = "\t", header = T, check.names = FALSE)

# Reformat column names
colnames(roi_df) <- sub(": ", ": Cell: ", colnames(roi_df)) # Insert "Cell:" into the column names for the marker genes for Seurat
colnames(roi_df) <- sub("\u00B5", "u", colnames(roi_df)) # replace mu with u

Sigsum check

# Extract the means for each marker
means_only_df <- roi_df %>% select(contains("Cell: Mean"))

# Get sum of all signals for each cell
means_only_df$sigsum <- rowSums(means_only_df)

# Calculate percentile cutoffs
sigsum_cutpoint_1 <- quantile(means_only_df$sigsum, sigsum_quantile_high)
sigsum_cutpoint_2 <- quantile(means_only_df$sigsum, sigsum_quantile_low)
print(paste("High sigsum:", sigsum_cutpoint_1, "Low sigsum:", sigsum_cutpoint_2))
## [1] "High sigsum: 89213.256294 Low sigsum: 25286.09846"
# Plot distribution of sigsums with cutoffs
ggplot(data = means_only_df, aes(x = sigsum)) +
  geom_histogram() +
  theme_bw() +
  geom_vline(aes(xintercept = sigsum_cutpoint_1)) +
  geom_vline(aes(xintercept = sigsum_cutpoint_2))

# Add metric to df
roi_df$sigsum_metric = "Cell"
roi_df$sigsum_metric[means_only_df$sigsum > sigsum_cutpoint_1] <- "SIGSUM High"
roi_df$sigsum_metric[means_only_df$sigsum < sigsum_cutpoint_2] <- "SIGSUM Low"
roi_df$sigsum <- means_only_df$sigsum

print("Sigsum metrics:")
## [1] "Sigsum metrics:"
table(roi_df$sigsum_metric, useNA = "ifany")
## 
##        Cell SIGSUM High  SIGSUM Low 
##       45440         484        2418

Bin density

# Calculate bins for cell density
roi_df$binX <- floor(roi_df$`Centroid X um`/bin_size)
roi_df$binY <- floor(roi_df$`Centroid Y um`/bin_size)

# Count cells in each bin
bincounts <- roi_df %>% count(binX, binY) %>% rename(bin_density = n)

# Add bin density metric to df
roi_df <- roi_df %>% inner_join(bincounts) %>% mutate(low_bin_density = bin_density <= density_cutoff)

top = min(roi_df$`Centroid Y um`) + max(roi_df$`Centroid Y um`)
roi_df$invertY = top - roi_df$`Centroid Y um`
# Plot cells with low density
ggplot(roi_df, aes(x = `Centroid X um`, y = invertY, fill = low_bin_density)) + ## Fix this to plot correctly
  geom_point(pch=21,colour="white") +
  ylab("Centroid Y um") +
  theme_minimal()

Clustering

Prep data to read into Seurat

# Filter on sigsum
roi_df <- roi_df %>% filter((sigsum < sigsum_cutpoint_1) & (sigsum > sigsum_cutpoint_2))
  
# Filter out low bin density
roi_df <- roi_df %>% filter(bin_density > density_cutoff)
roi_df$cellid <- paste0(roi, "_", 1:nrow(roi_df))
roi_df <- roi_df %>% select(c("cellid", colnames(roi_df)[1:ncol(roi_df) - 1]))

cols_keep <- c("cellid", "Image", "Object ID", "Name", "Class", "Parent", 
               "ROI", "Centroid X um", "Centroid Y um", "Area um^2", "Length um", 
               "Circularity", "Solidity", "Max diameter um", "Min diameter um", 
               "sigsum_metric", "sigsum", 
               "binX", "binY", "bin_density", "low_bin_density")

roi_df <- roi_df %>% select(all_of(cols_keep), contains("Cell:"))
fwrite(roi_df, allmarkers_outfile)

roi_df <- roi_df %>% select(all_of(cols_keep), contains(paste0(markers_selected, ": Cell")))
  
# Save filtered df to read into Seurat
fwrite(roi_df, filteredmarkers_outfile)

Dimension Reduction

codex.obj <- LoadAkoyaCustom(filename = filteredmarkers_outfile, type = "qupath", fov = roi)

codex.obj <- normalize_and_reduce(codex.obj)

Clustree

Clustree is used to help determine what clustering resolution should be used. Resolutions producing stable clusters (clusters which maintain more of the same cells across resolutions) are desirable.

codex.obj <- FindNeighbors(object = codex.obj, dims = c(1:length(markers_selected)), verbose = FALSE)
codex.obj <- FindClusters(object = codex.obj, verbose = FALSE, resolution = seq(0.1,1.1,0.2), n.start = 1)

tree <- clustree(codex.obj, prefix = "Akoya_snn_res.")
plot(tree)

treedata <- tree$data
tree_summary <- treedata %>% group_by(Akoya_snn_res.) %>% summarise(sc3_sd = min(sc3_stability) + sd(sc3_stability), n_cluster = n()) %>% filter(n_cluster > min_clusters)
clustering_res <- as.numeric(as.character(tree_summary$Akoya_snn_res.[which.max(tree_summary$sc3_sd)]))
if(length(clustering_res) > 1) clustering_res <- clustering_res[1]

final_nclust <- tree_summary$n_cluster[which.max(tree_summary$sc3_sd)]

treedata$`Selected resolution` <- treedata$Akoya_snn_res. == clustering_res

tree_summary <- treedata %>% group_by(Akoya_snn_res.) %>% summarise(sc3_sd = min(sc3_stability) + sd(sc3_stability), n_cluster = n()) %>% select(Resolution = Akoya_snn_res., `# clusters` = n_cluster)
kable(tree_summary, align = "r")
Resolution # clusters
0.1 6
0.3 6
0.5 11
0.7 14
0.9 15
1.1 18
ggplot(treedata, aes(x = Akoya_snn_res., y = size, fill = `Selected resolution`)) +
  geom_boxplot() +
  geom_jitter(width = 0.2) +
  theme_bw() +
  xlab("Resolution") +
  ylab("Cells per cluster") +
  ggtitle("Distribution of cluster sizes per resolution")

Increase resolution to yield more clusters

Multiply resolution by 2 to increase the number of clusters calculated

clustering_res <- clustering_res * 2
codex.obj <- cluster_seurat(codex.obj, res = clustering_res)
final_nclust <- length(table(codex.obj$seurat_clusters))

Final clustering: resolution 1.8, 27 clusters

clusters_out <- codex.obj@meta.data %>% select(seurat_clusters)
clusters_out <- cbind(clusters_out, GetTissueCoordinates(codex.obj@images[[roi]]))
clusters_out$seurat_clusters <- paste0("cluster_", clusters_out$seurat_clusters)
fwrite(clusters_out, cluster_outfile)

p1 <- DimPlot(codex.obj, label = T) + NoLegend()
p2 <- ImageDimPlot(codex.obj)
p1 + p2

plot_avg_heatmap <- function(codex.obj, order_manual = FALSE, col_ord = NULL, row_ord = NULL, scale_by = "row") {
  
  mat <- codex.obj@assays$Akoya@scale.data
  mat <- t(mat) %>% as.data.frame()
  mat$cluster <- codex.obj$seurat_clusters
  
  # Get means for each cluster
  smSub <- mat %>%
    group_by(cluster) %>%
    summarise_all(mean, na.rm = TRUE) %>%
    mutate_all(funs(replace(., is.na(.), 0))) %>%
    ungroup()
  
  # Get number of cells per cluster for annotation
  annoBarValues <- as.data.frame(table(mat$cluster))$Freq
  
  # Create matrix to be used in the heatmap
  if (scale_by == "row") {
    mat2 <- smSub %>%
    select(-c(cluster)) %>% replace(is.na(.), 0) %>%
    as.matrix()  %>% t() %>% pheatmap:::scale_rows()
  } else if (scale_by == "column") {
    mat2 <- smSub %>%
    select(-c(cluster)) %>% replace(is.na(.), 0) %>%
    as.matrix() %>% pheatmap:::scale_rows()  %>% t()
  } else print("Select a valid argument for scale_by: row or column.")
  
  ## Annotation for cluster
  ha = HeatmapAnnotation(Cluster = smSub$cluster,
                         ClusterID = anno_text(smSub$cluster, gp = gpar(fontsize = 12)))
   
  # Create barplot annotation for cluster size for bottom of heatmap
  ba = HeatmapAnnotation(CellCount = anno_barplot(annoBarValues,height=unit(2, "cm")))
  
  mat2[is.nan(mat2)] <- 0
  colnames(mat2) <- smSub$cluster
  col_fun = colorRamp2(c(-2, -0.5, 2), c("white", "#BAFBD8", "#004C23"))
  
  if (order_manual) {
    if (!is.null(row_ord)) {
      Heatmap(mat2,
            row_names_gp = gpar(fontsize = 13),
            top_annotation = ha,
            bottom_annotation = ba,
            column_order = col_ord,
            row_order = row_ord,
            border = TRUE)
      
    } else {
      Heatmap(mat2,
            row_names_gp = gpar(fontsize = 13),
            top_annotation = ha,
            bottom_annotation = ba,
            column_order = col_ord,
            border = TRUE)
    }
    
  } else {
    Heatmap(mat2,
            row_names_gp = gpar(fontsize = 13),
            top_annotation = ha,
            bottom_annotation = ba,
            border = TRUE)
  }
}

Heatmaps and ridgeplots by cluster

Top heatmaps show average marker intensity per cluster, bottom heatmaps show marker intensity on a single-cell level. Change tabs to see plots for all available markers.

Average heatmaps are scaled by column (cluster)

Phenotype markers

Scaled by column (cluster)

## Get row and column orders for subsequent heatmaps
ht_median <- plot_avg_heatmap(codex.obj, scale_by = "column")
ht_median <- draw(ht_median)

col_ord <- unlist(column_order(ht_median))
row_ord <- unlist(row_order(ht_median))

# png(paste0("plots/01-", roi, "_heatmap_12marker.png"), height = 6, width = 10, units = "in", res = 300)
# draw(ht_median)
# dev.off()

Scaled by row (channel)

ht_median_rowscale <- plot_avg_heatmap(codex.obj, scale_by = "row", col_ord = col_ord, row_ord = row_ord, order_manual = T)
ht_median_rowscale <- draw(ht_median_rowscale)

# DoHeatmap(codex.obj) # +
  # theme(axis.text.y = element_text(size = 12), title = element_text(size = 20))
RidgePlot(codex.obj, features = markers_selected, ncol = 3)

All available markers

Scaled by column (cluster)

codex.obj_allmarkers <- LoadAkoyaCustom(filename = allmarkers_outfile, type = "qupath", fov = roi)

if (all(codex.obj$cellid == codex.obj_allmarkers$cellid)) {
  codex.obj_allmarkers$seurat_clusters <- codex.obj$seurat_clusters
  Idents(codex.obj_allmarkers) <- codex.obj_allmarkers$seurat_clusters
}

codex.obj_allmarkers <- suppressMessages(NormalizeData(object = codex.obj_allmarkers, normalization.method = "CLR", margin = 2))
codex.obj_allmarkers <- suppressMessages(ScaleData(codex.obj_allmarkers))
VariableFeatures(codex.obj_allmarkers) <- rownames(codex.obj_allmarkers)

ht_all <- plot_avg_heatmap(codex.obj_allmarkers, order_manual = T, col_ord = col_ord, scale_by = "column")
ht_all <- draw(ht_all)

row_ord_all <- row_order(ht_all)

# png(paste0("plots/01-", roi, "_heatmap_20marker.png"), height = 6, width = 10, units = "in", res = 300)
# draw(ht_all)
# dev.off()

# DoHeatmap(codex.obj_allmarkers)  +
  # theme(axis.text.y = element_text(size = 12), title = element_text(size = 20))

Scaled by row (channel)

ht_all_roword <- plot_avg_heatmap(codex.obj_allmarkers, order_manual = T, col_ord = col_ord, row_ord = row_ord_all, scale_by = "row")
ht_all_roword <- draw(ht_all_roword)

RidgePlot(codex.obj_allmarkers, features = codex.obj_allmarkers@assays$Akoya@var.features, ncol = 3)